
** build master county-quarter regression file 
if 1 {
	
	* list of counties 
	use "$Rep_smokelabor/1_build/geo/ls_county.dta", clear
	
	* expand to yq
	expand 14
	bys countyfip: gen rfrnc_yr=_n+2005
	
	expand 4 
	bys countyfip rfrnc_yr: gen rfrnc_qtroy=_n
	tab rfrnc_yr rfrnc_qtroy
	
	* merge in enviro vars 
	tostring countyfip, gen(COUNTY10) format(%05.0f)
	assert strlen(COUNTY10)==5
	
	ren rfrnc_qtroy rfrnc_qtr
	merge 1:1 COUNTY10 rfrnc_yr rfrnc_qtr using "$Rep_smokelabor/1_build/hms/raw/county_smoke_quarterly.dta", keepusing(hms_* o3 so2 no2 pm25 pm10 tavg ppt) keep(match master) nogen
	ren rfrnc_qtr rfrnc_qtroy
	
	* convert ozone from ppm to ppb for readability
	replace o3 = o3*1000
	
	* seer population counts
	merge m:1 countyfip rfrnc_yr using "$Rep_smokelabor/1_build/seer/proc/seer_county_year.dta", keep(match master) nogen
	
	* qwi 
	merge 1:1 countyfip rfrnc_yr rfrnc_qtroy using "$Rep_smokelabor/1_build/qwi/proc/qwi_county_quarterly.dta", keep(match master) nogen 
	
	* lau 
	merge 1:1 countyfip rfrnc_yr rfrnc_qtroy using "$Rep_smokelabor/1_build/lau/proc/lau_county_quarterly.dta", keep(match master) nogen 
	
	* per capita
	gen pc_qwi_payroll 		= qwi_payroll 	/ seer_pop
	
	* per million age 16+
	gen pmil_qwi_emptotal 	= (qwi_emptotal / seer_pop16plus)*1000000
	gen pmil_lau_lfp 		= (lau_lfp 		/ seer_pop16plus)*1000000
	
	* state, division, region code
	gen statefip = substr(COUNTY10,1,2)
	destring statefip, replace 
	merge m:1 statefip using "$Rep_smokelabor/1_build/geo/state_to_region_division_cw.dta", keep(match master) nogen
	
	* fes
	egen fe_styr = group(statefip rfrnc_yr)
	egen fe_divisionyr = group(divisionfip rfrnc_yr)
	egen fe_regionyr = group(regionfip rfrnc_yr)
	egen fe_countyqtroy=group(countyfip rfrnc_qtroy)
	gen rfrnc_qtros=rfrnc_yr*100+rfrnc_qtroy
	egen fe_stqtros=group(statefip rfrnc_qtros)
	egen fe_divisionqtros=group(divisionfip rfrnc_qtros)
	egen fe_regionqtros=group(regionfip rfrnc_qtros)
	
	* first diff
	tsset fe_countyqtroy rfrnc_yr
	foreach v of varlist pm25 hms_deep pc_qwi_payroll pmil_qwi_emptotal pmil_lau_lfp {
		gen d_`v'=`v'-L1.`v'
	}
	
	* merge census county X's 
	merge m:1 countyfip using "$Rep_smokelabor/1_build/nhgis/proc/nhgis_county_Xs.dta", keep(match master) nogen 
	
	* calculate county avg pm2.5
	bys countyfip: gegen avg_pm25=mean(pm25)
	
	* create dummies for above median counties
	foreach var in cen2010_fracurb acs0716_pov100 acs0716_p50homeval acs0716_fracblack avg_pm25 {
		qui summ `var', d
		gen up50_`var' = (`var'>=`r(p50)') if !mi(`var')
	}
	
	* merge narr weather vars 
	merge 1:1 countyfip rfrnc_qtros using "$Rep_smokelabor/1_build/weather/raw/narr_county_quarterly.dta", keep(match master) nogen
	egen g_ppt=cut(ppt), g(10)
	egen g_wspd=cut(wspd), g(10)
	
	* labels 
	lab var rfrnc_yr "Reference year"
	lab var rfrnc_qtroy "Reference quarter of year"
	lab var pc_qwi_payroll "Per capita QWI payroll"
	lab var pmil_qwi_emptotal "QWI employment per million people"
	lab var pmil_lau_lfp "LAU LFP per million people"
	lab var statefip "FIPs state"
	lab var fe_styr "FEs: state by year"
	lab var fe_divisionyr "FEs: census division by year"
	lab var fe_regionyr "FEs: census region by year"
	lab var fe_countyqtroy "FEs: county by quarter-of-year"
	lab var rfrnc_qtros "Reference quarter-of-sample"
	lab var fe_stqtros "FEs: state by quarter-of-sample"
	lab var fe_divisionqtros "FEs: census division by quarter-of-sample"
	lab var fe_regionqtros"FEs: census region by quarter-of-sample"
	
	* save 
	compress
	saveold "$Rep_smokelabor/1_build/regdata/county_quarter.dta", replace
}

** build age-county-quarter regression file 
if 1 {
	
	use "$Rep_smokelabor/1_build/regdata/county_quarter.dta", clear 
	
	* drop labor vars
	drop seer_* *qwi_* *lau_* 
	
	* expand to age groups 
	sort countyfip rfrnc_qtros
	gen _id = _n
	expand 5 
	bys _id: gen _agegrp=_n 
	gen agegrp="."
	replace agegrp="A04" if _agegrp==1
	replace agegrp="A05" if _agegrp==2
	replace agegrp="A06" if _agegrp==3
	replace agegrp="A07" if _agegrp==4
	replace agegrp="A08" if _agegrp==5
	
	tab agegrp 
	drop _id _agegrp
	
	* merge agegrp-county-quarter qwi data 
	merge 1:1 countyfip rfrnc_yr rfrnc_qtroy agegrp using "$Rep_smokelabor/1_build/qwi/proc/qwi_agegrp_county_quarterly.dta", keep(match master) nogen
	
	* merge agegrp-county-year seer population data 
	merge m:1 countyfip rfrnc_yr agegrp using "$Rep_smokelabor/1_build/seer/proc/seer_agegrp_county_year.dta", keep(match master) nogen
		
	* per capita conversion
	gen pc_qwi_payroll = qwi_payroll/seer_pop
	gen pmil_qwi_emptotal = qwi_emptotal*1000000/seer_pop
	
	* save 
	compress
	saveold "$Rep_smokelabor/1_build/regdata/agegrp_county_quarter.dta", replace
} 

** build industry-county-quarter regression file 
if 1 {
	
	use "$Rep_smokelabor/1_build/regdata/county_quarter.dta", clear 
				
	* drop labor vars
	drop *qwi_* *lau_* 
	
	* expand to industry grps 
	sort countyfip rfrnc_qtros
	gen _id = _n
	expand 20 
	bys _id: gen _industry=_n 
	gen industry="."
	local i = 0
	foreach val in 11 21 22 23 31-33 42 44-45 48-49 51 52 53 54 55 56 61 62 71 72 81 92 {
		local i = `i'+1
		replace industry="`val'" if _industry==`i'
	}
				
	tab industry 
	drop _id _industry
	
	* merge agegrp-county-quarter qwi data 
	merge 1:1 countyfip rfrnc_yr rfrnc_qtroy industry using "$Rep_smokelabor/1_build/qwi/proc/qwi_naics2_county_quarterly.dta", keep(match master) nogen
		
	* per capita conversion
	gen pc_qwi_payroll = qwi_payroll/seer_pop
	gen pmil_qwi_emptotal = qwi_emptotal*1000000/seer_pop
	
	** index for industry groups 
	egen g_industry=group(industry)
	
	* save 
	compress
	saveold "$Rep_smokelabor/1_build/regdata/naics2_county_quarter.dta", replace

	
	
}
